Final Project Code and Shiny App: Tour de France

Author

Tristan Hamilton

Data: https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-04-07/readme.md

The shiny app can be found below. The subsequent code is for wrangling, and experimenting with visualizations before implementing them into the app.

## DATA
library(tdf)
library(tidyverse)
library(lubridate)
library(plotly)
library(readr)

tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')

tuesdata <- tidytuesdayR::tt_load('2020-04-07')

    Downloading file 1 of 3: `stage_data.csv`
    Downloading file 2 of 3: `tdf_stages.csv`
    Downloading file 3 of 3: `tdf_winners.csv`
tuesdata <- tidytuesdayR::tt_load(2020, week = 15)

    Downloading file 1 of 3: `stage_data.csv`
    Downloading file 2 of 3: `tdf_stages.csv`
    Downloading file 3 of 3: `tdf_winners.csv`
winners <- tdf::editions %>% 
  select(-stage_results)

all_years <- tdf::editions %>% 
  unnest_longer(stage_results) %>% 
  mutate(stage_results = map(stage_results, ~ mutate(.x, rank = as.character(rank)))) %>% 
  unnest_longer(stage_results) 

stage_all <- all_years %>% 
  select(stage_results) %>% 
  flatten_df()

combo_df <- bind_cols(all_years, stage_all) %>% 
  select(-stage_results)

stage_clean <- combo_df %>% 
  select(edition, start_date,stage_results_id:last_col()) %>% 
  mutate(year = lubridate::year(start_date)) %>% 
  rename(age = age...25) %>% 
  select(edition, year, everything(), -start_date)

tdf_winners <- tuesdata$tdf_winners

Pre-shiny wrangling:

winners <- winners |>
  mutate(year = year(start_date))

winners <- winners |>
  mutate(year_born = year(born)) |>
  mutate(year_died = year(died)) |>
  mutate(lifespan = year_died - year_born)

winners_age <- winners |> group_by(edition, year, winner_name, winner_team) |>
  summarise(winner_age = mean(age))
`summarise()` has grouped output by 'edition', 'year', 'winner_name'. You can
override using the `.groups` argument.
winners_wins <- winners |> group_by(edition, year, winner_name, winner_team) |>
  summarise(stage_wins = sum(stage_wins))
`summarise()` has grouped output by 'edition', 'year', 'winner_name'. You can
override using the `.groups` argument.
winners <- winners |> mutate(age_range = case_when(age <= 23 ~ 
                                                           "Young (< 23)",
                                                         age > 23 & age <= 32 ~ 
                                                           "Average (24-32)",
                                                         age > 32 ~ 
                                                           "Old (> 32)"))
age_bracket_count <- winners |> group_by(age_range) |>
  summarise(num = n())


overall_age <- mean(winners$age)
overall_age2 <- median(winners$age)

winners_df2 <- winners_age |> left_join(winners_wins, by = c("edition", "year", "winner_name", "winner_team"))

winners_df3 <- winners |>
  select(winner_name, distance, time_overall, time_margin)

top_riders <- winners_df2 |> group_by(winner_name) |>
  summarise(tdf_wins = n()) |>
  arrange(desc(tdf_wins)) |>
  slice(1:6)

winners_plot <- ggplot(data = winners_df2, aes(x = year, y = winner_age, label = winner_name, label2 = stage_wins)) +
  geom_line(colour = "deepskyblue1") +
  geom_point(size = 0.75) +
  geom_hline(yintercept = overall_age, colour = "red", linewidth = 0.5) +
  # geom_hline(yintercept = overall_age2, colour = "darkorange", linewidth = 0.5) +
  theme_classic() +
  labs(title = "Ages of Tour de France Winners",
       x = "Year",
       y = "Age of Winner") 

ggplotly(winners_plot)
# age stuff
average_age <- stage_clean |> group_by(year) |>
  summarise(mean_year_age = mean(age))

stage_clean2000 <- stage_clean |>
  filter(year >= 2000)

average_age2000 <- stage_clean2000 |> group_by(team, year) |>
  summarise(avg_age = mean(age))
`summarise()` has grouped output by 'team'. You can override using the
`.groups` argument.
winners2000 <- winners |>
  filter(year >= 2000)

winner_join <- inner_join(winners2000, stage_clean2000)
Joining with `by = join_by(edition, age, year)`
team_age <- winner_join |> group_by(winner_team) |>
  summarise(team_mean_age = mean(age))

winner_age <- winners2000 |> group_by(winner_name, winner_team) |>
  summarise(winner_mean_age = mean(age))
`summarise()` has grouped output by 'winner_name'. You can override using the
`.groups` argument.
age_joined <- winner_age |> left_join(team_age, by = c("winner_team"))

## age comparison
age_dc <- age_joined |> 
  filter(winner_team == "Discovery Channel")


ggplot(data = age_dc, aes(x = reorder(winner_team, team_mean_age), 
                              y = team_mean_age)) +
  geom_point(colour = "red3", size = 6) +
  geom_point(aes(x = winner_name, y = winner_mean_age), 
             colour = "yellow3", size = 6) +
  labs(x = "Name",
       y = "Age") +
  theme_classic() +
  ylim(20, 35)

## country wins
country_wins <- winners |> group_by(nationality) |>
  summarise(country_wins = n()) |>
  arrange(desc(country_wins)) |>
  slice(1:7)

plot_test <- ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                             y = country_wins, 
                                             text = paste("TDF Wins: ", country_wins))) +
  geom_col(fill = "darkorange2", colour = "black") +
  coord_flip() +
  theme_minimal() +
  labs(x = "Country",
       y = "Number of TDF Wins")

ggplotly(plot_test, tooltip = "text")
## country wins 2
country_wins <- winners |> group_by(nationality) |>
  summarise(country_wins = n()) |>
  arrange(desc(country_wins)) 

plot_test2 <- ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                              y = country_wins, 
                                              text = paste("TDF Wins: ", country_wins))) +
  geom_col(fill = "deepskyblue3", colour = "black") +
  coord_flip() +
  theme_minimal() +
  labs(x = "Country",
       y = "Number of TDF Wins")

ggplotly(plot_test2, tooltip = "text")
## top riders
rider_wins <- winners |> group_by(winner_name) |>
  summarise(num_wins = n()) |>
  arrange(desc(num_wins)) |>
  slice(1:9)

winners_year75 <- winners |>
  filter(year >= 1975)

winners_year00 <- winners |> 
  filter(year < 1975)

plot_test3 <- ggplot(data = rider_wins, aes(x = reorder(winner_name, num_wins), 
                                             y = num_wins, 
                                             text = paste("TDF Wins: ", num_wins))) +
  geom_col(fill = "deepskyblue4", colour = "black") +
  coord_flip() +
  theme_minimal() +
  labs(x = "Rider Name",
       y = "Number of TDF Wins")

ggplotly(plot_test3, tooltip = "text")
# team wins
team_wins <- winners |> group_by(winner_team) |>
  summarise(team_wins = n()) |>
  arrange(desc(team_wins)) |>
  slice(1:11)

SHINY APP:

library(shinythemes)
library(shiny)
library(ggplot2)
library(plotly)
library(knitr)

rider_choices <- winners_df2 |> distinct(winner_name) |> pull(winner_name)
team_choices <- winner_join |> distinct(winner_team) |> pull(winner_team)
ui <- fluidPage(
  theme = shinytheme("united"),
  tabsetPanel(
    tabPanel("Age of Winners",
             mainPanel(
               selectInput("rider_sel",
                           label = "Choose a Tour de France Winner: (chronological)",
                           choices = rider_choices),
               helpText("Each data point is a Tour de France winner. The horizontal 
                        line represents the mean winner age across all editions, 
                        27.7 years old"),
               helpText("Note: the red-highlighted point(s) display the winning year(s) 
               and respective age of the user-selected champion."),
               plotOutput("winners_plot2"),
               tableOutput("data_table_react"),
             )),
    tabPanel("Age Comparison",
             selectInput("team_sel",
                         label = "Choose a winning team (2000-2019):",
                         choices = team_choices),
                            helpText("The selected team is displayed with its corresponding
                                     Tour de France winner(s)."),
             helpText("Note: Team Age is calculated as the cumulative average of each team's 
                                     mean rider age from 2000 to 2019."),
             mainPanel(
               plotlyOutput("age_plot"),
               tableOutput("team_age_react"),
             )
    ),
    tabPanel("Top Riders",
             radioButtons("output_sel",
                          label = "Choose Display:",
                          choices = c("All-time", "1975 - present", "First Edition - 1974"),
                          selected = "All-time"),
             mainPanel(
               plotlyOutput("rider_plot"),
             )
    ),
    tabPanel("Top Countries",
             radioButtons("output_select",
                          label = "Choose Display:",
                          choices = c("Top-Performing Countries", "All Countries"),
                          selected = "Top-Performing Countries"),
             mainPanel(
               plotlyOutput("country_plot"),
             )
    ),
    tabPanel("Top Teams",
             mainPanel(
               plotlyOutput("team_plot"),
             ))
  ),
)

server <- function(input, output, session) {
  
  winner_reactive <- reactive({
    winners_df2 |> filter(winner_name == input$rider_sel)
  })
  
  output$winners_plot2 <- renderPlot({
    ggplot(data = winners_df2, aes(x = year, y = winner_age)) +
      geom_line(colour = "deepskyblue1") +
      geom_point(size = 0.75) +
      geom_point(data = winner_reactive(), aes(x = year, y = winner_age), 
                 colour = "red", size = 4) +
      geom_hline(yintercept = overall_age, colour = "darkorange", linewidth = 0.5) +
      theme_classic(base_size = 18) +
      labs(title = "Age: Tour de France Winners",
           x = "Year",
           y = "Age (years)")
  })
  output$data_table_react <- renderTable({
    winner_reactive() 
  })
  
  ## rider win output 
  output$rider_plot <- renderPlotly({
    if (input$output_sel == "All-time") {
      rider_wins <- winners |> group_by(winner_name) |>
        summarise(num_wins = n()) |>
        arrange(desc(num_wins)) |>
        slice(1:9)
      
      ggplotly(
        ggplot(data = rider_wins, aes(x = reorder(winner_name, num_wins), 
                                      y = num_wins, 
                                      text = paste("TDF Wins: ", num_wins))) +
          geom_col(fill = "royalblue1", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Most Wins (All-time)",
               x = "Rider Name",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
    else if (input$output_sel == "First Edition - 1974") {
      rider_wins3 <- winners_year00 |> group_by(winner_name) |>
        summarise(num_wins = n()) |>
        arrange(desc(num_wins)) |>
        slice(1:5)
      
      ggplotly(
        ggplot(data = rider_wins3, aes(x = reorder(winner_name, num_wins), 
                                       y = num_wins, 
                                       text = paste("TDF Wins: ", num_wins))) +
          geom_col(fill = "royalblue4", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Most Wins (First Edition - 1974)",
               x = "Rider Name",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
    else {
      rider_wins2 <- winners_year75 |> group_by(winner_name) |>
        summarise(num_wins = n()) |>
        arrange(desc(num_wins)) |>
        slice(1:8)
      
      ggplotly(
        ggplot(data = rider_wins2, aes(x = reorder(winner_name, num_wins), 
                                       y = num_wins, 
                                       text = paste("TDF Wins: ", num_wins))) +
          geom_col(fill = "royalblue3", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Most Wins (1975 - present)",
               x = "Rider Name",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
  })
  
  
  ## country win output
  output$country_plot <- renderPlotly({
    if (input$output_select == "Top-Performing Countries") {
      
      country_wins <- winners |> group_by(nationality) |>
        summarise(country_wins = n()) |>
        arrange(desc(country_wins)) |>
        slice(1:7)
      
      ggplotly(
        ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                        y = country_wins,
                                        text = paste("TDF Wins: ", country_wins))) +
          geom_col(fill = "aquamarine3", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Top-Performing Countries",
               x = "Country",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
    else {
      country_wins <- winners |> group_by(nationality) |>
        summarise(country_wins = n()) |>
        arrange(desc(country_wins)) 
      
      ggplotly(
        ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                        y = country_wins,
                                        text = paste("TDF Wins: ", country_wins))) +
          geom_col(fill = "cadetblue3", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "All Countries",
               x = "Country",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
  })
  
  ## team win output
  output$team_plot <- renderPlotly({
    
    team_wins <- winners |> group_by(winner_team) |>
      summarise(team_wins = n()) |>
      arrange(desc(team_wins)) |>
      slice(1:11)
    
    ggplotly(
      ggplot(data = team_wins, aes(x = reorder(winner_team, team_wins), 
                                   y = team_wins,
                                   text = paste("TDF Wins: ", team_wins))) +
        geom_col(fill = "deepskyblue2", colour = "black") +
        coord_flip() +
        theme_minimal() +
        labs(title = "Teams: Most Wins",
             x = "Team Name",
             y = "Number of TDF Wins"),
      tooltip = "text"
    )
  })
  
  team_age <- winner_join |> group_by(winner_team) |>
    summarise(team_mean_age = mean(age))
  
  winner_age <- winners2000 |> group_by(winner_name, winner_team) |>
    summarise(winner_mean_age = mean(age))
  
  age_joined <- winner_age |> left_join(team_age, by = c("winner_team"))
  
  age_reactive <- reactive({
    age_joined |> filter(winner_team == input$team_sel)
  })
  
  output$age_plot <- renderPlotly({
    
    ggplotly(
      ggplot(data = age_reactive(), aes(x = reorder(winner_team, team_mean_age), 
                                        y = team_mean_age, 
                                        text = paste("Team Age: ", 
                                                     round(team_mean_age, 1)))) +
        geom_point(colour = "red3", size = 6) +
        geom_point(aes(x = winner_name, y = winner_mean_age, 
                       text = paste("Winner Age: ", round(winner_mean_age, 1))), 
                   colour = "yellow2", size = 6) +
        labs(x = "Name",
             y = "Age") +
        theme_classic(base_size = 15) +
        ylim(20, 35),
      tooltip = "text"
    )
  })
}

shinyApp(ui, server)

Shiny applications not supported in static R Markdown documents